Built with R version:
3.5.0
Built with R version:
3.5.0
Load necessary libraries
library(affy)
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: ‘BiocGenerics’
The following objects are masked from ‘package:parallel’:
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply, parSapply,
parSapplyLB
The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
The following objects are masked from ‘package:base’:
anyDuplicated, append, as.data.frame, basename, cbind, colMeans, colnames, colSums, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl,
intersect, is.unsorted, lapply, lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans,
rownames, rowSums, sapply, setdiff, sort, table, tapply, union, unique, unsplit, which, which.max, which.min
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for packages 'citation("pkgname")'.
library(ComplexHeatmap)
Loading required package: grid
========================================
ComplexHeatmap version 1.18.1
Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
Github page: https://github.com/jokergoo/ComplexHeatmap
Documentation: http://bioconductor.org/packages/ComplexHeatmap/
If you use it in published research, please cite:
Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
genomic data. Bioinformatics 2016.
========================================
library(plot3D)
library(gplots)
Attaching package: ‘gplots’
The following object is masked from ‘package:stats’:
lowess
library(circlize)
========================================
circlize version 0.4.4
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: http://jokergoo.github.io/circlize_book/book/
If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
in R. Bioinformatics 2014.
========================================
library(AnnotationDbi)
Loading required package: stats4
Loading required package: IRanges
Loading required package: S4Vectors
Attaching package: ‘S4Vectors’
The following object is masked from ‘package:gplots’:
space
The following object is masked from ‘package:base’:
expand.grid
library(limma)
Attaching package: ‘limma’
The following object is masked from ‘package:BiocGenerics’:
plotMA
library(lattice)
library(org.Hs.eg.db)
library(MASS)
Attaching package: ‘MASS’
The following object is masked from ‘package:AnnotationDbi’:
select
library(RColorBrewer)
library(AnnotationDbi)
library(rglwidget)
The functions in the rglwidget package have been moved to rgl.
###library(hgu133plus2hsentrezgcdf)
library(VennDiagram)
Loading required package: futile.logger
library(org.Hs.eg.db)
library(GenomicRanges)
Loading required package: GenomeInfoDb
library(GenomicFeatures)
library(rtracklayer)
library(biomaRt)
library(glmnet)
Loading required package: Matrix
Attaching package: ‘Matrix’
The following object is masked from ‘package:S4Vectors’:
expand
Loading required package: foreach
Loaded glmnet 2.0-16
library(survival)
library(Hmisc)
Loading required package: Formula
Loading required package: ggplot2
Attaching package: ‘Hmisc’
The following object is masked from ‘package:AnnotationDbi’:
contents
The following object is masked from ‘package:Biobase’:
contents
The following objects are masked from ‘package:base’:
format.pval, units
library(ConsensusClusterPlus)
library(pheatmap)
library(ggplot2)
library(heatmap.plus)
library(rgl)
library(caret)
Attaching package: ‘caret’
The following object is masked from ‘package:survival’:
cluster
library(e1071)
Attaching package: ‘e1071’
The following object is masked from ‘package:Hmisc’:
impute
library(tmod)
set1 = c(brewer.pal(9,"Set1"), brewer.pal(8, "Dark2"))
violinJitter <- function(x, magnitude=1){
d <- density(x)
data.frame(x=x, y=runif(length(x),-magnitude/2, magnitude/2) * approxfun(d$x, d$y)(x))
}
rotatedLabel <- function(x0 = seq_along(labels), y0 = rep(par("usr")[3], length(labels)), labels, pos = 1, cex=1, srt=45, ...) {
w <- strwidth(labels, units="user", cex=cex)
h <- strheight(labels, units="user",cex=cex)
u <- par('usr')
p <- par('plt')
f <- par("fin")
xpd <- par("xpd")
par(xpd=NA)
text(x=x0 + ifelse(pos==1, -1,1) * w/2*cos(srt/360*2*base::pi), y = y0 + ifelse(pos==1, -1,1) * w/2 *sin(srt/360*2*base::pi) * (u[4]-u[3])/(u[2]-u[1]) / (p[4]-p[3]) * (p[2]-p[1])* f[1]/f[2] , labels, las=2, cex=cex, pos=pos, adj=1, srt=srt,...)
par(xpd=xpd)
}
avefc = function (y, log=TRUE, replace= FALSE) {
if (log) y = 2^y
if (replace) y = y + (1-min(y))
m = apply(y,1,mean)
y.n = y/m
y.n2 = y.n
y.n2 [y.n2 < 1] = 1/ (y.n2 [y.n2 < 1])
ave.fc = apply (y.n2, 1, mean)
return(ave.fc)
}
For gene convertion from array to HUGO
ensembl = useMart( "ensembl", dataset = "hsapiens_gene_ensembl" )
Upload or generate GEP normalized matrix
### choice 1: import processed matrix
# data.dir="./Rmd.files/"
data.dir = '/Users/emagene/Dropbox/codes/github/PTCL/'
setwd(data.dir)
load (file.path(data.dir,"/Rmd.files/541_PTCL_batch_adjusted_geo.id.Rdata"))
geneExpr = adj.data
# import batch and re-order accordingly
load(file.path(data.dir,"/Rmd.files/PTCL.batch.Rdata"))
batch = batch [order(batch$nameNEW),]
batch.series = as.vector(batch$center)
batch$cancer = "cancer"
# ### OPTIONAL: CHECK BATCH ON FINAL.MOLEC
#
# #mod = model.matrix(~as.factor(center), data=batch)
# mod = model.matrix(~as.factor(final.molec), data=design)
# mod0 = model.matrix(~1, data= batch)
# library(sva)
# n.sv = num.sv(adj.data,mod,method="leek")
# svobj = sva(adj.data,mod,mod0,n.sv=n.sv)
#
# pValues = f.pvalue(adj.data,mod,mod0)
# qValues = p.adjust(pValues,method="BH")
# modSv = cbind(mod,svobj$sv)
# mod0Sv = cbind(mod0,svobj$sv)
# pValuesSv = f.pvalue(adj.data,modSv,mod0Sv)
# qValuesSv = p.adjust(pValuesSv,method="BH")
### end of choice 1
### choice 2: generate your own affy object and custom data
# download CEL files from GEO series GSE6338, GSE19067, GSE19069, GSE40160, GSE58445, GSE65823 and EBI series ETABM702, ETABM783
# GSM368580.CEL, GSM368582.CEL, GSM368584.CEL, GSM368586.CEL, GSM368589.CEL, GSM368591.CEL, GSM368594.CEL, GSM472164.CEL, GSM1411278.CEL, GSM1411284.CEL, GSM1411285.CEL, GSM1411287.CEL, GSM1411355.CEL, GSM1411364.CEL, GSM1411368.CEL, GSM1411425.CEL, GSM1411427.CEL excluded from the analysis (see Methods for explaination")
### celfiles <- dir("~/Documents/DATI/PTCL.nos/GSE6338-GSE19067-GSE19069-GSE40160-GSE58445-GSE65823-ETABM702-ETABM783/", pattern = ".CEL")
### library(affy)
### gset = justRMA(celfile.path = "/Users/emagene/Documents/DATI/PTCL.nos/GSE6338-GSE19067-GSE19069-GSE40160-GSE58445-GSE65823-ETABM702-ETABM783/", ### filenames = celfiles, sampleNames = gsub(".CEL","", celfiles), cdfname = "hgu133plus2hsentrezgcdf")
### geneExpr = exprs(gset)
### batch adjustment
### library(sva)
### # import batch and re-order accordingly
### load("./Rmd.files/PTCL.batch.Rdata")
### batch = batch [order(rownames(batch)),]
### batch.series = as.vector(batch$center)
### geneExprNEW = geneExpr [ , order(colnames(geneExpr)) ]
### geneExprNEW = geneExprNEW[grep("AFFX",rownames(geneExprNEW), invert=TRUE),]
### # check order correspondence and, if correct, adjust data
### if (all(colnames(geneExprNEW) == rownames(batch))) {
### adj.data = ComBat (geneExprNEW, batch.series, mod = NULL, par.prior = TRUE, prior.plots = TRUE)
### } else {
### cat("Error: colnames and batch did not correspond")
### }
### geneExpr = adj.data
### colnames(geneExpr) = as.vector(batch$nameNEW)
### end of choice 2
Upload paz info with clinical and mutational data
pts.info.data <- read.table("./Rmd.files/541_paz_info_MUT.txt", sep="\t", header=TRUE, check.names=FALSE, stringsAsFactors = F)
# customize colors for categories
levels(as.factor(pts.info.data$final.molec))
[1] "AITL" "ALCL.neg" "ALCL.pos" "ATLL" "NKT" "PTCL.nos" "T.CD30" "T.CD4" "T.CD8" "T.DR" "T.reg" "TCR-HL"
# "AITL" "ALCL.neg" "ALCL.pos" "ATLL" "NKT" "PTCL.nos" "T.CD30" "T.CD4" "T.CD8" "T.DR" "T.reg" "TCR-HL"
colorz = c("black", "yellow","dodgerblue2","brown2","darkorchid1", "orange", "grey42", "grey52","grey62","grey72","grey82","grey92")
temp = split ( pts.info.data$sample.nameNEW, pts.info.data$final.molec )
colorx = colnames(geneExpr)
length(colorz)
[1] 12
length(temp)
[1] 12
for (i in 1:length(colorz)) colorx [ which(colorx %in% unlist(temp[i])) ] = colorz[i]
library(gplots)
colorx = col2hex(colorx)
### build design matrix and transform to numerical
design <- pts.info.data[,c(1:2,6:8,14:17)]
rownames(design)<- design[,1]
design<- design[,-c(1:2)]
#design<-na.omit(design) ### select onyl patients with all mutations data available (n=53)
design$age<- as.numeric(as.character(design$age))
design$age<- design$age - median(design$age)
design[design == "WT"] <- 0
design[design == "MUT"] <- 1
design$final.molec[design$final.molec=="AITL"] <- 0
design$final.molec[design$final.molec=="PTCL.nos"] <- 1
design$final.molec[design$final.molec=="ALCL.neg"] <- 2
design$final.molec[design$final.molec=="ALCL.pos"] <- 3
design$final.molec[design$final.molec=="ATLL"] <- 4
design$final.molec[design$final.molec=="NKT"] <- 5
design$final.molec[477:541] <- 6
design$gender[design$gender=="M"] <- 1
design$gender[design$gender=="F"] <- 0
design$age = NULL
all(pts.info.data$sample.nameNEW == batch$nameNEW)
[1] TRUE
slices <- table(pts.info.data$final.molec)
lbls <- names(table(pts.info.data$final.molec))
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, ": ", slices, " (", pct, "%)", sep="" ) # add percents to labels
#pdf("Figure_1a_pie_plot.pdf", width = 5, height = 5)
par(mfrow=c(1,1))
par(mar=c(3,3,3,3), xpd=F)
pie(slices,labels = lbls, init.angle = 0, col=colorz, main="", cex=0.6, radius=0.8)
#dev.off()
# apply variational filter
afc2 = avefc(geneExpr, log=TRUE, replace=FALSE)
data541exprs.vf = geneExpr [afc2 >= 2, ]
dim(data541exprs.vf )
[1] 1840 541
# retry PCA on shorted gene list
data541m = t(as.matrix(data541exprs.vf))
pca<-prcomp(data541m,scale=T)
mfrow3d(nr = 1, nc = 1, sharedMouse = T)
plot3d(pca$x,rgl.use=F,col=colorx,size=0.6,type="s")
rglwidget()
mat = as.matrix(data541exprs.vf)
base_mean = rowMeans(mat)
mat_scaled = t(apply(mat, 1, scale))
types = pts.info.data$final.molec
color.annot = col2hex(colorz); names (color.annot)= names(temp)
ha = HeatmapAnnotation(df = data.frame(type = types) , col = list(type = c( color.annot ) ) )
ha@anno_list[[1]]@color_mapping@colors = col2hex(colorz)
names(ha@anno_list[[1]]@color_mapping@colors) = names(temp)
ht = Heatmap(mat_scaled, name = "expression", km = 7, clustering_method_columns = "ward.D", col = colorRamp2(c(-1, 0, 1), c("green", "white", "red")), top_annotation = ha, top_annotation_height = unit(4, "mm"), show_row_names = FALSE, show_column_names = FALSE)
column_order(ht)
[1] 91 236 85 86 429 88 64 311 270 271 229 231 180 257 87 227 258 304 309 436 431 234 148 147 31 29 450 281 500 521 499 273 272 269 522 520 501 519 498 518 497 505 506 504 486
[46] 485 482 483 484 502 503 524 527 529 528 530 532 531 525 526 523 434 61 182 176 63 83 66 445 230 515 517 513 296 298 516 494 492 496 493 314 297 294 295 293 289 292 512 507 511
[91] 510 509 291 290 514 508 448 447 444 440 396 397 395 394 391 50 51 187 192 433 349 363 385 428 435 427 389 366 491 401 334 459 392 402 388 457 463 446 380 351 474 372 376 489 487
[136] 537 488 534 536 495 490 535 533 481 479 478 480 477 264 261 268 259 262 253 267 265 266 263 65 254 256 255 318 308 320 319 315 324 326 313 280 299 285 316 286 278 344 330 430 331
[181] 275 305 307 136 426 184 162 167 422 421 469 341 245 160 179 153 443 449 181 149 368 186 164 141 194 218 161 98 300 312 84 442 323 301 174 306 248 329 325 178 139 145 142 144 185
[226] 157 249 172 177 158 183 150 170 171 152 131 134 137 216 214 213 241 202 205 203 191 130 133 195 215 244 211 246 238 197 224 226 225 169 538 168 539 540 219 223 220 221 222 206 204
[271] 232 242 200 243 143 250 154 207 198 235 240 247 252 163 155 383 188 239 417 276 406 93 303 339 109 53 124 82 26 32 23 22 20 189 18 398 41 44 10 193 217 129 2 140 208
[316] 251 199 201 209 210 212 233 237 288 284 420 439 274 352 287 441 328 283 282 321 317 310 279 322 332 302 348 419 399 387 166 159 337 467 151 277 355 359 410 353 470 475 466 175 370
[361] 42 411 374 393 404 327 456 94 432 541 156 173 146 165 135 128 106 458 361 364 354 90 454 27 403 138 196 462 453 338 415 407 379 371 464 461 110 79 121 45 405 425 96 102 424
[406] 423 360 452 451 365 358 89 1 28 19 347 101 260 455 59 418 367 48 69 190 333 132 17 378 350 468 346 9 414 409 413 412 408 116 70 122 123 340 381 460 382 77 38 108 37
[451] 228 49 68 67 416 62 92 71 4 103 126 36 120 127 56 74 13 12 72 104 105 100 21 30 25 33 24 465 345 117 343 342 471 336 476 356 15 60 390 357 362 99 97 437 39
[496] 78 438 80 375 40 113 112 52 58 400 57 55 54 76 11 114 73 75 472 3 6 377 119 47 35 125 5 95 81 34 46 43 16 14 386 8 473 335 111 384 7 118 373 369 107
[541] 115
rle.custom = function (a, logged2 = TRUE, file = NULL, colorbox= NULL, labels=NULL , legend = NULL ) {
a.m <- apply(a,1,median)
if (logged2) {
for (i in 1:dim(a)[2]) {
a [,i] <- a [,i] - a.m
}
} else {
for (i in 1:dim(a)[2]) {
a [,i] <- log (a [,i] / a.m )
}
}
# png(file,10240,3840)
par(mar=c(10,4,6,2))
boxplot (a, ylim= c(-5,5), outline=F, col=colorbox, xlab="pts", names=labels, las=2, cex.axis = 1.5, main="RLE", xlim = c(1,600), cex.main = 5 )
legend("bottomright",legend = c(levels(as.factor(pts.info.data$final.molec))),
fill = colorz, # 6:1 reorders so legend order matches graph
title = "Legend",
cex = 5)
# dev.off()
a.c = apply(a, 2, stats::quantile)
return(a.c)
}
#rle.medians = rle.custom(geneExpr, colorbox=colorx, file="./RLE.541.png", labels=pts.info.data$sample.nameNEW )
#plot(rle.medians[3,], type="l", xlab="pts", ylab="RLE median" )
rle.medians = rle.custom(geneExpr, colorbox=colorx, file="./RLE.541.png", labels=pts.info.data$sample.nameNEW )
plot(rle.medians[3,], type="l", xlab="pts", ylab="RLE median" )
Define design file and filter geneExpr for patients included in design data frame and
design <- pts.info.data[,c(1:2,6:8,14:17)]
rownames(design)<- design[,1]
design<- design[,-c(1:2)]
design<-na.omit(design) ### select onyl patients with all mutations data available (n=53)
design$age<- as.numeric(as.character(design$age))
design$age<- design$age - median(design$age)
design[design == "WT"] <- 0
design[design == "MUT"] <- 1
design$final.molec[design$final.molec=="AITL"] <- 0
design$final.molec[design$final.molec=="PTCL.nos"] <- 1
design$gender[design$gender=="M"] <- 1
design$gender[design$gender=="F"] <- 0
design$offset <- rep(1, nrow(design))
design<-design[,c(8,1:7)]
all(pts.info.data$sample.nameNEW == colnames(geneExpr)) ## check correspondence
[1] TRUE
# geneExpr = geneExpr [ , order (pts.info.data$geo.id)] ### do only to set correspondence in case of custom procedure
# colnames(geneExpr) = pts.info.data$sample.nameNEW [ order (pts.info.data$geo.id)]
geneExpr2<- (geneExpr[, rownames(design)])
geneExpr2<- data.matrix(geneExpr2, rownames.force = NA)
design<- data.matrix(design, rownames.force = NA)
We use the lmFit function from the limma package. This comes with a whole series of powerful and reliable tests.
glm = lmFit(geneExpr2[,rownames(design)], design = design )
glm = eBayes(glm)
F.stat <- classifyTestsF(glm[,-1],fstat.only=TRUE)
glm$F <- as.vector(F.stat)
df1 <- attr(F.stat,"df1")
df2 <- attr(F.stat,"df2")
if(df2[1] > 1e6){
glm$F.p.value <- pchisq(df1*glm$F,df1,lower.tail=FALSE)
}else
glm$F.p.value <- pf(glm$F,df1,df2,lower.tail=FALSE)
set.seed(12345678)
rlm <- lmFit(geneExpr[,rownames(design)], apply(design, 2, sample))
rlm <- eBayes(rlm)
F.stat <- classifyTestsF(rlm[,-1],fstat.only=TRUE)
rlm$F <- as.vector(F.stat)
df1 <- attr(F.stat,"df1")
df2 <- attr(F.stat,"df2")
if(df2[1] > 1e6){
rlm$F.p.value <- pchisq(df1*rlm$F,df1,lower.tail=FALSE)
}else
rlm$F.p.value <- pf(rlm$F,df1,df2,lower.tail=FALSE)
F.stat <- classifyTestsF(glm[,2:5],fstat.only=TRUE)
df1 <- attr(F.stat,"df1")
df2 <- attr(F.stat,"df2")
F.p.value <- pchisq(df1*F.stat,df1,lower.tail=FALSE)
R.stat <- classifyTestsF(rlm[,2:5],fstat.only=TRUE)
Rall = 1 - 1/(1 + glm$F * (ncol(design)-1)/(nrow(design)-ncol(design)))
Rgenetics = 1 - 1/(1 + F.stat * 4/(nrow(design)-ncol(design)))
Pgenetics = 1 - 1/(1 + R.stat * 4/(nrow(design)-ncol(design)))
names(Rgenetics) <- names(Pgenetics) <- names(Rall) <- rownames(geneExpr)
par(bty="n", mgp = c(2,.33,0), mar=c(3,2.5,1,1)+.1, las=1, tcl=-.25, xpd=NA)
d <- density(Pgenetics,bw=1e-3)
f <- 40 #nrow(gexpr)/512
#pdf("Figure_2a_MAY.pdf", width = 10, height = 7)
par(mfrow=c(1,1))
par(mar=c(8,5,5,5), xpd=F)
plot(d$x, d$y * f, col='grey', xlab=expression(paste("Explained variance per gene ", R^2)), main="", lwd=2, type="l", ylab="", xlim=c(0,1), cex.axis=1.2, cex.lab=1.5, bty="n")
title(ylab="Density", line=2.5, cex.lab=1.5)
d <- density(Rgenetics, bw=1e-3)
r <- min(Rgenetics[p.adjust(F.p.value,"BH")<0.01]) ######## threshold to select 412 genes
x0 <- which(d$x>r)
polygon(d$x[c(x0[1],x0)], c(0,d$y[x0])* f, col=paste(set1[1],"44",sep=""), border=NA)
lines(d$x, d$y* f, col=set1[1], lwd=2)
text(d$x[x0[1]], d$y[x0[1]]*f +20, pos=4, paste(sum(Rgenetics > r), "genes q < 0.01"))
legend("topright", bty="n", col=c(set1[1], "grey"), lty=1, c("Observed","Random"), lwd=2)
#dev.off()
glmPrediction <- glm$coefficients %*% t(design)
rlmPrediction <- rlm$coefficients %*% t(design)
Print signficiant genes
kk<-as.data.frame((p.adjust(F.p.value,"BH")<0.01))
kk$gene<- rownames(kk)
colnames(kk)[1]<-"code"
kk2<-kk[kk$code=="TRUE",]
### sort(kk2$gene) ##### if you want to print the entire list of differentially expressed genes
Extract the list of differentially expressed genes by mutations
### customize colors in colMutations
# colMutations = c(brewer.pal(8,"Set1")[-6], rev(brewer.pal(8,"Dark2")), brewer.pal(7,"Set2"))[c(1:12,16:19,13:15)]
# o <- order(apply(col2rgb(colMutations),2,rgb2hsv)[1,])
# colMutations <- colMutations[rev(o)][(4*1:19 +15) %% 19 + 1][1:7]
colMutations = col2hex(c("magenta", "purple","gray60","red","lightblue","green","orange"))
names(colMutations) <- colnames(design)[-1]
gene_code<- kk2$gene
tab=NULL
for(i in (1:length(kk2$gene)))
{
gene_single<- gene_code[i]
y <- glm$coefficients[gene_single,-1]+glm$coefficients[gene_single,1]
w <- glm$p.value[gene_single,-1] < 0.05
int<-c(gene_single, as.character(w))
tab<- rbind(tab, int)
}
rownames(tab)<-seq(1:nrow(tab))
colnames(tab)<- c("gene",colnames(design)[-1])
# Write to disk a file with all significant genes
#write.table(tab, "table_differentially_expressed_gene.txt",sep="\t", quote=F, row.names = F, col.names = T)
Example of extraction
# temp_name = unique(getBM( attributes = c("ensembl_transcript_id", "entrezgene", "external_gene_name"), filters = "entrezgene", values = gsub("_at","",gene_single),
# mart = ensembl)$external_gene_name)
#pdf("Figure_2b.pdf", width = 10, height = 7)
par(mfrow=c(1,1))
par(mar=c(10,8,5,5), xpd=F)
par(bty="n", mgp = c(1.5,.33,0),las=1, tcl=-.25, xpd=F)
temp_name<- "YAP1"
plot(glmPrediction[gene_single,], geneExpr[gene_single,rownames(design)], ylab="", xlab="",
pch=16, cex=1, cex.axis=1.2, cex.lab=1.5)
title(ylab=(paste("Observed ",temp_name, " expression")), line=2.5, cex.lab=1.5)
title( xlab=(paste("Predicted ",temp_name, " expression")), line=2.5, cex.lab=1.5)
abline(0,1)
u <- par("usr")
par(xpd=NA)
y <- glm$coefficients[gene_single,-1]+glm$coefficients[gene_single,1]
u <- par("usr")
x0 <- rep(u[3]+1,ncol(design)-1)
y0 <- u[4] + 0.05*(u[4]-u[3]) - rank(-y)/length(y) * (u[4]-u[3])/1.2
d <- density(y)
lines(d$x, d$y/5+1+u[3], col="grey")
lines(d$x, -d$y/5+1+u[3], col="grey")
points(x=y, y=x0+violinJitter(y, magnitude=0.25)$y, col=colMutations, pch=16, cex=1.5)
text(x=glm$coefficients[gene_single,1], y= 5.2, "Model coefficients", cex=0.8)
legend("topleft",names(colMutations), col = colMutations, bty= "n", cex = 1.2, pch = 16)
#dev.off()
Plot significant effects per covariate (q<0.01)
testResults <- decideTests(glm, method="hierarchical",adjust.method="BH", p.value=0.01)[,-1]
significantGenes <- sapply(1:ncol(testResults), function(j){
c <- glm$coefficients[testResults[,j]!=0,j+1]
table(cut(c, breaks=c(-5,seq(-1.5,1.5,l=7),5)))
})
colnames(significantGenes) <- colnames(testResults)
rownames(tab)<-c(1:nrow(tab))
tab2<- as.data.frame(tab)
tab2$gene<-as.character(as.character(tab2$gene))
tab2$final.molec<-as.character(as.character(tab2$final.molec))
tab2$TET2<-as.character(as.character(tab2$TET2))
tab2$RHOA<-as.character(as.character(tab2$RHOA))
tab2$IDH2<-as.character(as.character(tab2$IDH2))
tab2$DNMT3A<-as.character(as.character(tab2$DNMT3A))
# pdf("Figure_2c.pdf", width = 10, height = 7)
par(mfrow=c(1,1))
par(mar=c(8,8,5,5), xpd=F)
par(mfrow=c(1,1))
par(bty="n", mgp = c(2.5,.33,0), mar=c(5,5.5,5,0)+.1, las=2, tcl=-.25)
b <- barplot(significantGenes, las=2, ylab = "Differentially expressed genes", col=brewer.pal(8,"RdYlBu"), legend.text=FALSE , border=0, xaxt="n", cex.lab=1.5)#, col = set1[simple.annot[names(n)]], border=NA)
rotatedLabel(x0=b-0.1, y0=rep(-0.5, ncol(significantGenes)), labels=colnames(significantGenes), cex=1.2, srt=45, font=ifelse(grepl("[[:lower:]]", colnames(design))[-1], 1,3), col=colMutations)
rotatedLabel(b-0.1, colSums(significantGenes), colSums(significantGenes), pos=3, cex=, srt=45)#dev.off()
clip(0,30,0,1000)
x0 <- 7.5
image(x=x0+c(0,0.8), y=par("usr")[4]+seq(-1,1,l=9) -4, z=matrix(1:8, ncol=8), col=brewer.pal(8,"RdYlBu"), add=TRUE)
text(x=x0+1.1, y=par("usr")[4]+c(-1,0,1) -4, format(seq(-1,1,l=3),2), cex=0.66)
lines(x=rep(x0+0.9, 2), y=par("usr")[4]+c(-1,1) -4)
segments(x0+0.9,par("usr")[4] + 1-4,x0+0.95,par("usr")[4] + 1-4)
segments(x0+0.9,par("usr")[4] + 0-4,x0+0.95,par("usr")[4] + 0-4)
segments(x0+0.9,par("usr")[4] + -1-4,x0+0.95,par("usr")[4] + -1-4)
text(x0 + 0.45, par("usr")[4] + 1.5-4, "log2 FC", cex=.66)
#dev.off()
# par(bty="n", mgp = c(2.5,.33,0), mar=c(3,3.3,3,0)+.1, las=1, tcl=-.25)
# t <- table(rowSums(abs(testResults[,1:6])))
# b <- barplot(t[-1],ylab="Differentially expressed genes", col=rev(brewer.pal(7, "Spectral")[-(4:5)]), border=NA)
# rotatedLabel(b-0.1, t[-1], t[-1], pos=3, cex=1, srt=45)
# title(xlab="Associated drivers", line=2)
Print the list of differently expressed genes using the Ensembl annotation
select_hist<- pts.info.data[pts.info.data$final.molec == "AITL" | pts.info.data$final.molec == "PTCL.nos",]
gene<- as.data.frame(testResults)
sig_genes<- gene[gene$final.molec!= 0 |gene$IDH2 != 0 | gene$TET2 != 0 | gene$DNMT3A != 0 | gene$RHOA != 0,]
list_genes<-sort(rownames(sig_genes)) ##### list of signficiant genes
geneannotation1 <- getBM( attributes = c("ensembl_transcript_id", "entrezgene", "external_gene_name"), filters = "entrezgene", values = gsub("_at","",list_genes), mart = ensembl)
sort(unique(geneannotation1$external_gene_name))
[1] "ADRA2A" "AL441992.1" "ARHGEF10" "C3" "COL4A4" "DZIP1" "EFNB2" "HS3ST3A1" "ID2" "NETO2" "OSMR" "PRRX1" "ROBO1"
[14] "SLC5A3" "XKR4" "YAP1"
Generate a heatmap with AITL, PTCL-NOS with the extracted differentially expressed genes.
gep<- geneExpr[,select_hist$sample.nameNEW]
mat<- gep[list_genes,]
setdiff(rownames(mat), paste0(unique(geneannotation1$entrezgene),"_at"))
character(0)
for (ii in 1:nrow(mat)) {
#if(length (which (paste0(unique(geneannotation1$entrezgene),"_at") == rownames(mat)[ii])) != 0 ) rownames(mat) [ii] = geneannotation1$external_gene_name [ which (paste0(unique(geneannotation1$entrezgene),"_at") == rownames(mat)[ii])]
rownames(mat) [ii] = unique(geneannotation1$external_gene_name) [ which (paste0(unique(geneannotation1$entrezgene),"_at") == rownames(mat)[ii])]
}
mycol= c("red","white","yellow")
mylabel = select_hist[,c("sample.nameNEW","final.molec","IDH2","RHOA","TET2","DNMT3A")]
rownames(mylabel) = mylabel$sample.nameNEW
mylabel$sample.nameNEW = NULL
mylabel.nocol = mylabel
mylabel.col = mylabel
mylabel.col[is.na(mylabel.col)]<-0
#head(mylabel.col)
mylabel.col$final.molec[mylabel.col$final.molec == "AITL"] = "black"; mylabel.col$final.molec[mylabel.col$final.molec == "PTCL.nos"] = "orange"
for (a in 2:5) mylabel.col[,a] = factor(mylabel.col[,a], levels = levels(as.factor(mylabel.col[,a])), labels = mycol )
mat <- mat - rowMeans(mat)
par(mfrow=c(1,1))
cluster.pts.nr = pheatmap(mat, annotation_col = mylabel.nocol, annotation_colors = list(final.molec = c(AITL = "black", PTCL.nos = "orange"), filename= "x.pdf",
IDH2 = c(MUT=mycol[1],"NA"=mycol[2],WT=mycol[3]),
RHOA = c(MUT=mycol[1],"NA"=mycol[2],WT=mycol[3]),
TET2 = c(MUT=mycol[1],"NA"=mycol[2],WT=mycol[3]),
DNMT3A = c(MUT=mycol[1],"NA"=mycol[2],WT=mycol[3]) ) , show_colnames = F, cellheight = 15,
border_color= NA, color = colorRampPalette(rev(brewer.pal(n = 5 , name = "RdYlBu")))(20), scale = "row", clustering_method = "ward.D2",clustering_distance_cols = "euclidean" , silent = F)
### export pts order
cluster.pts.nr$tree_col$labels [cluster.pts.nr$tree_col$order]
[1] "PTCL.nos..23" "PTCL.nos..428" "PTCL.nos..448" "PTCL.nos..124" "PTCL.nos..247" "PTCL.nos..463" "PTCL.nos..89" "PTCL.nos..156" "PTCL.nos..432" "PTCL.nos..216" "PTCL.nos..25"
[12] "PTCL.nos..87" "PTCL.nos..94" "PTCL.nos..98" "PTCL.nos..105" "PTCL.nos..93" "PTCL.nos..195" "AITL..413" "PTCL.nos..531" "PTCL.nos..143" "PTCL.nos..46" "PTCL.nos..28"
[23] "PTCL.nos..185" "PTCL.nos..416" "PTCL.nos..112" "PTCL.nos..424" "PTCL.nos..134" "PTCL.nos..32" "PTCL.nos..22" "PTCL.nos..194" "PTCL.nos..30" "PTCL.nos..211" "PTCL.nos..52"
[34] "PTCL.nos..97" "PTCL.nos..201" "PTCL.nos..27" "PTCL.nos..68" "PTCL.nos..139" "PTCL.nos..72" "PTCL.nos..120" "PTCL.nos..444" "PTCL.nos..24" "PTCL.nos..15" "PTCL.nos..109"
[45] "PTCL.nos..29" "PTCL.nos..100" "PTCL.nos..171" "PTCL.nos..104" "PTCL.nos..99" "PTCL.nos..126" "PTCL.nos..258" "AITL..536" "PTCL.nos..535" "PTCL.nos..20" "PTCL.nos..102"
[56] "PTCL.nos..452" "PTCL.nos..529" "PTCL.nos..90" "PTCL.nos..230" "PTCL.nos..231" "PTCL.nos..232" "PTCL.nos..236" "PTCL.nos..16" "PTCL.nos..189" "PTCL.nos..506" "PTCL.nos..519"
[67] "PTCL.nos..151" "PTCL.nos..186" "AITL..419" "PTCL.nos..455" "PTCL.nos..47" "PTCL.nos..213" "PTCL.nos..161" "PTCL.nos..61" "PTCL.nos..209" "PTCL.nos..119" "PTCL.nos..80"
[78] "PTCL.nos..34" "PTCL.nos..440" "AITL..19" "AITL..18" "PTCL.nos..504" "PTCL.nos..118" "PTCL.nos..293" "PTCL.nos..92" "PTCL.nos..251" "PTCL.nos..101" "PTCL.nos..446"
[89] "AITL..479" "PTCL.nos..469" "AITL..411" "AITL..473" "AITL..472" "AITL..481" "AITL..487" "PTCL.nos..434" "PTCL.nos..180" "PTCL.nos..135" "PTCL.nos..445"
[100] "PTCL.nos..408" "PTCL.nos..409" "PTCL.nos..460" "PTCL.nos..468" "PTCL.nos..470" "PTCL.nos..471" "PTCL.nos..441" "PTCL.nos..451" "AITL..12" "PTCL.nos..237" "AITL..165"
[111] "PTCL.nos..178" "AITL..458" "AITL..191" "AITL..163" "AITL..187" "AITL..62" "PTCL.nos..249" "AITL..250" "AITL..257" "AITL..110" "AITL..260"
[122] "AITL..60" "AITL..77" "AITL..84" "AITL..106" "AITL..74" "AITL..133" "AITL..113" "AITL..127" "AITL..44" "AITL..82" "AITL..197"
[133] "AITL..223" "AITL..17" "AITL..523" "AITL..530" "AITL..154" "AITL..45" "AITL..505" "AITL..2" "AITL..238" "AITL..11" "AITL..259"
[144] "AITL..10" "AITL..234" "AITL..435" "PTCL.nos..239" "AITL..6" "AITL..438" "AITL..518" "AITL..532" "AITL..256" "AITL..449" "AITL..129"
[155] "AITL..9" "AITL..450" "AITL..534" "AITL..66" "AITL..255" "AITL..207" "AITL..8" "AITL..1" "AITL..152" "AITL..229" "AITL..248"
[166] "AITL..235" "AITL..420" "AITL..483" "AITL..157" "AITL..179" "AITL..67" "AITL..70" "AITL..225" "AITL..71" "AITL..58" "AITL..78"
[177] "AITL..137" "AITL..206" "AITL..459" "AITL..144" "AITL..222" "PTCL.nos..294" "AITL..198" "AITL..453" "AITL..210" "AITL..26" "AITL..114"
[188] "PTCL.nos..226" "AITL..51" "AITL..224" "AITL..69" "AITL..123" "AITL..221" "AITL..150" "AITL..43" "AITL..153" "AITL..520" "AITL..159"
[199] "AITL..79" "AITL..204" "AITL..214" "PTCL.nos..246" "AITL..167" "AITL..190" "PTCL.nos..289" "PTCL.nos..287" "PTCL.nos..288" "PTCL.nos..128" "PTCL.nos..136"
[210] "PTCL.nos..91" "PTCL.nos..86" "PTCL.nos..196" "PTCL.nos..212" "PTCL.nos..13" "PTCL.nos..96" "PTCL.nos..95" "PTCL.nos..208" "PTCL.nos..290" "PTCL.nos..115" "PTCL.nos..219"
[221] "AITL..484" "AITL..7" "AITL..457" "AITL..454" "PTCL.nos..200" "PTCL.nos..215" "PTCL.nos..252" "PTCL.nos..166" "PTCL.nos..218" "PTCL.nos..291" "PTCL.nos..292"
[232] "PTCL.nos..467" "PTCL.nos..482" "AITL..502" "AITL..515" "AITL..406" "PTCL.nos..162" "PTCL.nos..465" "PTCL.nos..203" "PTCL.nos..63" "PTCL.nos..240" "PTCL.nos..33"
[243] "AITL..510" "AITL..513" "AITL..147" "AITL..426" "PTCL.nos..527" "PTCL.nos..414" "PTCL.nos..415" "AITL..14" "AITL..456" "AITL..205" "AITL..55"
[254] "AITL..64" "AITL..199" "AITL..65" "PTCL.nos..3" "AITL..121" "AITL..517" "PTCL.nos..174" "PTCL.nos..524" "PTCL.nos..243" "PTCL.nos..242" "PTCL.nos..244"
[265] "AITL..392" "AITL..466" "AITL..4" "AITL..5" "PTCL.nos..241" "AITL..417" "AITL..461"
cluster.pts.nr$tree_col$labels
[1] "AITL..1" "AITL..10" "AITL..106" "AITL..11" "AITL..110" "AITL..113" "AITL..114" "AITL..12" "AITL..121" "AITL..123" "AITL..127"
[12] "AITL..129" "AITL..133" "AITL..137" "AITL..14" "AITL..144" "AITL..147" "AITL..150" "AITL..152" "AITL..153" "AITL..154" "AITL..157"
[23] "AITL..159" "AITL..163" "AITL..165" "AITL..167" "AITL..17" "AITL..179" "AITL..18" "AITL..187" "AITL..19" "AITL..190" "AITL..191"
[34] "AITL..197" "AITL..198" "AITL..199" "AITL..2" "AITL..204" "AITL..205" "AITL..206" "AITL..207" "AITL..210" "AITL..214" "AITL..221"
[45] "AITL..222" "AITL..223" "AITL..224" "AITL..225" "AITL..229" "AITL..234" "AITL..235" "AITL..238" "AITL..248" "AITL..250" "AITL..255"
[56] "AITL..256" "AITL..257" "AITL..259" "AITL..26" "AITL..260" "AITL..392" "AITL..4" "AITL..406" "AITL..411" "AITL..413" "AITL..417"
[67] "AITL..419" "AITL..420" "AITL..426" "AITL..43" "AITL..435" "AITL..438" "AITL..44" "AITL..449" "AITL..45" "AITL..450" "AITL..453"
[78] "AITL..454" "AITL..456" "AITL..457" "AITL..458" "AITL..459" "AITL..461" "AITL..466" "AITL..472" "AITL..473" "AITL..479" "AITL..481"
[89] "AITL..483" "AITL..484" "AITL..487" "AITL..5" "AITL..502" "AITL..505" "AITL..51" "AITL..510" "AITL..513" "AITL..515" "AITL..517"
[100] "AITL..518" "AITL..520" "AITL..523" "AITL..530" "AITL..532" "AITL..534" "AITL..536" "AITL..55" "AITL..58" "AITL..6" "AITL..60"
[111] "AITL..62" "AITL..64" "AITL..65" "AITL..66" "AITL..67" "AITL..69" "AITL..7" "AITL..70" "AITL..71" "AITL..74" "AITL..77"
[122] "AITL..78" "AITL..79" "AITL..8" "AITL..82" "AITL..84" "AITL..9" "PTCL.nos..100" "PTCL.nos..101" "PTCL.nos..102" "PTCL.nos..104" "PTCL.nos..105"
[133] "PTCL.nos..109" "PTCL.nos..112" "PTCL.nos..115" "PTCL.nos..118" "PTCL.nos..119" "PTCL.nos..120" "PTCL.nos..124" "PTCL.nos..126" "PTCL.nos..128" "PTCL.nos..13" "PTCL.nos..134"
[144] "PTCL.nos..135" "PTCL.nos..136" "PTCL.nos..139" "PTCL.nos..143" "PTCL.nos..15" "PTCL.nos..151" "PTCL.nos..156" "PTCL.nos..16" "PTCL.nos..161" "PTCL.nos..162" "PTCL.nos..166"
[155] "PTCL.nos..171" "PTCL.nos..174" "PTCL.nos..178" "PTCL.nos..180" "PTCL.nos..185" "PTCL.nos..186" "PTCL.nos..189" "PTCL.nos..194" "PTCL.nos..195" "PTCL.nos..196" "PTCL.nos..20"
[166] "PTCL.nos..200" "PTCL.nos..201" "PTCL.nos..203" "PTCL.nos..208" "PTCL.nos..209" "PTCL.nos..211" "PTCL.nos..212" "PTCL.nos..213" "PTCL.nos..215" "PTCL.nos..216" "PTCL.nos..218"
[177] "PTCL.nos..219" "PTCL.nos..22" "PTCL.nos..226" "PTCL.nos..23" "PTCL.nos..230" "PTCL.nos..231" "PTCL.nos..232" "PTCL.nos..236" "PTCL.nos..237" "PTCL.nos..239" "PTCL.nos..24"
[188] "PTCL.nos..240" "PTCL.nos..241" "PTCL.nos..242" "PTCL.nos..243" "PTCL.nos..244" "PTCL.nos..246" "PTCL.nos..247" "PTCL.nos..249" "PTCL.nos..25" "PTCL.nos..251" "PTCL.nos..252"
[199] "PTCL.nos..258" "PTCL.nos..27" "PTCL.nos..28" "PTCL.nos..287" "PTCL.nos..288" "PTCL.nos..289" "PTCL.nos..29" "PTCL.nos..290" "PTCL.nos..291" "PTCL.nos..292" "PTCL.nos..293"
[210] "PTCL.nos..294" "PTCL.nos..3" "PTCL.nos..30" "PTCL.nos..32" "PTCL.nos..33" "PTCL.nos..34" "PTCL.nos..408" "PTCL.nos..409" "PTCL.nos..414" "PTCL.nos..415" "PTCL.nos..416"
[221] "PTCL.nos..424" "PTCL.nos..428" "PTCL.nos..432" "PTCL.nos..434" "PTCL.nos..440" "PTCL.nos..441" "PTCL.nos..444" "PTCL.nos..445" "PTCL.nos..446" "PTCL.nos..448" "PTCL.nos..451"
[232] "PTCL.nos..452" "PTCL.nos..455" "PTCL.nos..46" "PTCL.nos..460" "PTCL.nos..463" "PTCL.nos..465" "PTCL.nos..467" "PTCL.nos..468" "PTCL.nos..469" "PTCL.nos..47" "PTCL.nos..470"
[243] "PTCL.nos..471" "PTCL.nos..482" "PTCL.nos..504" "PTCL.nos..506" "PTCL.nos..519" "PTCL.nos..52" "PTCL.nos..524" "PTCL.nos..527" "PTCL.nos..529" "PTCL.nos..531" "PTCL.nos..535"
[254] "PTCL.nos..61" "PTCL.nos..63" "PTCL.nos..68" "PTCL.nos..72" "PTCL.nos..80" "PTCL.nos..86" "PTCL.nos..87" "PTCL.nos..89" "PTCL.nos..90" "PTCL.nos..91" "PTCL.nos..92"
[265] "PTCL.nos..93" "PTCL.nos..94" "PTCL.nos..95" "PTCL.nos..96" "PTCL.nos..97" "PTCL.nos..98" "PTCL.nos..99"
#pheatmap::pheatmap(test, filename="test.pdf")
LOOCV on AILT, PTCLnos based on 16-gene model
y = t(mat)
cl.orig = c()
for (u in 1:nrow(y)) cl.orig [u] = unlist(strsplit(rownames(y)[u],"\\."))[1]
perm.mother = rownames(y)
perm.son = combn (perm.mother, length(perm.mother)-1)
output <- cbind(perm.mother, NA)
for (i in 1:length(perm.mother)) {
train <- y [ perm.son[,i], ]
test <- y [ ! ( rownames(y) %in% perm.son[,i]) , ]
cl <- cl.orig [which(rownames(y)%in%perm.son[,i])]
z <- lda(train, cl)
p <- predict(z,test)$class
output [ setdiff(1:271, which( rownames(y) %in% perm.son[,i]) ) , 2 ] = as.character(p)
# output [ output[,1] == rownames(test) , 3 ] = z$scaling [1,1]
# output [ output[,1] == rownames(test) , 4 ] = z$scaling [2,1]
# output [ output[,1] == rownames(test) , 5 ] = z$scaling [3,1]
}
colnames(output) = c("true","LOOCV.predicted")
output = as.data.frame(output)
output$true.class = cl.orig
table(output$true.class, output$LOOCV.predicted )
AITL PTCL
AITL 106 21
PTCL 16 128
confusionMatrix(table(output$true.class, output$LOOCV.predicted ))
Confusion Matrix and Statistics
AITL PTCL
AITL 106 21
PTCL 16 128
Accuracy : 0.8635
95% CI : (0.8168, 0.902)
No Information Rate : 0.5498
P-Value [Acc > NIR] : <2e-16
Kappa : 0.7252
Mcnemar's Test P-Value : 0.5108
Sensitivity : 0.8689
Specificity : 0.8591
Pos Pred Value : 0.8346
Neg Pred Value : 0.8889
Prevalence : 0.4502
Detection Rate : 0.3911
Detection Prevalence : 0.4686
Balanced Accuracy : 0.8640
'Positive' Class : AITL
# Confusion Matrix and Statistics
#
#
# AITL PTCL
# AITL 106 21
# PTCL 16 128
#
# Accuracy : 0.8635
# 95% CI : (0.8168, 0.902)
# No Information Rate : 0.5498
# P-Value [Acc > NIR] : <2e-16
#
# Kappa : 0.7252
# Mcnemar's Test P-Value : 0.5108
#
# Specificity : 0.8591
# Pos Pred Value : 0.8346
# Neg Pred Value : 0.8889
# Prevalence : 0.4502
# Detection Rate : 0.3911
# Detection Prevalence : 0.4686
# Balanced Accuracy : 0.8640
#
# 'Positive' Class : AITL
Use ConsensusClusterPlus to extract most significant clusters: analyze sample stratification based on the extracted differentially expressed genes betwee AILT and PTCL-nos and the ALCL ALK-negative 3-gene model.
select_hist<- pts.info.data[pts.info.data$final.molec == "AITL" | pts.info.data$final.molec == "PTCL.nos" | pts.info.data$final.molec == "ALCL.neg",]
# Add three classifier genes for ALCL ALK-neg [Agnelli et al, Blood, 2012]
# Check on array
anaplastic_gene<- c("TNFRSF8","BATF3","TMOD1")
geneannotation2 <- getBM( attributes = c("entrezgene", "external_gene_name"), filters = "external_gene_name", values = anaplastic_gene, mart = ensembl )
anaplastic_gene_ARRAY<- paste0(geneannotation2$entrezgene, "_at")
# Append 16-gene model to 3-gene model
list_genes_all<- c(list_genes, anaplastic_gene_ARRAY)
# Redo consensus cluster analysis
gep<- geneExpr[,select_hist$sample.nameNEW]
mat<- gep[list_genes_all,]
title=tempdir()
d<- data.matrix(mat)
d = sweep(d,1, apply(d,1,median,na.rm=T))
results = ConsensusClusterPlus(d,maxK=8,
pFeature=1,
title=title,
clusterAlg="hc",
innerLinkage="ward.D2",
finalLinkage="ward.D2",
distance="euclidean",
seed=123456789)
end fraction
kk<- as.data.frame((results[[5]]$consensusClass)) ##### 4 significant cluster
kk$geo.id<- rownames(kk)
colnames(kk)[1]<- "cluster"
table(kk$cluster)
1 2 3 4 5
87 103 32 63 55
Plot heatmap AITL, PTCL-NOS, ALCL-neg and the 19-gene model
heat<- merge(t(mat), kk, by.x = 0, by.y="geo.id")
heat2<- merge(heat, pts.info.data, by.x = 1, by.y="sample.nameNEW")
heat2<- heat2[order(heat2$cluster),]
mycol= c("red","white","yellow")
mylabel = heat2[,c("Row.names","cluster","final.molec","TET2","RHOA","IDH2","DNMT3A")]
colnames(mylabel)<- c("sample.names","clusters","Histology","TET2","RHOA","IDH2","DNMT3A")
rownames(mylabel) = mylabel$sample.names
mylabel$sample.names = NULL
mylabel.nocol = mylabel
mylabel.col = mylabel
mylabel.col[is.na(mylabel.col)]<-0
#head(mylabel.col)
mylabel.col$Histology[mylabel.col$Histology == "AITL"] = "black"; mylabel.col$Histology[mylabel.col$Histology == "PTCL.nos"] = "orange"; mylabel.col$Histology[mylabel.col$Histology == "ALCL.neg"] = "yellow"
for (a in c(3:6)) mylabel.col[,a] = factor(mylabel.col[,a], levels = levels(as.factor(mylabel.col[,a])), labels = mycol )
mycol_plus<- c(brewer.pal(11,"Paired"),brewer.pal(6,"Set2"))
for (a in 1) mylabel.col[,a] = factor(mylabel.col[,a], levels = levels(as.factor(mylabel.col[,a])), labels = mycol_plus[1:5] )
mylabel.nocol$clusters<-as.numeric(as.character(mylabel.nocol$clusters))
mylabel.nocol$clusters<-as.character(paste("cluster",mylabel.nocol$clusters, sep=""))
par(mfrow=c(1,1))
par(mar=c(5,5,5,5), xpd=F)
mat3<- t(data.matrix(heat2[,2:20]))
colnames(mat3)<-heat2$Row.names
mat3= mat3[order(rownames(mat3)),]
temp_name = getBM( attributes = c("ensembl_transcript_id", "entrezgene", "external_gene_name"), filters = "entrezgene", values = gsub("_at","",rownames(mat3)), mart = ensembl)[,c(2:3)]
temp_name = temp_name[!duplicated(temp_name[,1]),]
rownames(mat3) = temp_name$external_gene_name
mat3 <- mat3 - rowMeans(mat3)
par(mfrow=c(1,1))
pheatmap(mat3, annotation_col = mylabel.nocol, annotation_colors = list(clusters = c(cluster1= mycol_plus[1], cluster2 = mycol_plus[2], cluster3 = mycol_plus[3], cluster4 = mycol_plus[4], cluster5 = mycol_plus[5]), Histology = c(AITL = "black", PTCL.nos = "orange", ALCL.neg= "yellow"), IDH2 = c(MUT=mycol[1], "NA"=mycol[2],WT=mycol[3]), RHOA = c(MUT=mycol[1],"NA"=mycol[2],WT=mycol[3]), TET2 = c(MUT=mycol[1],"NA"=mycol[2],WT=mycol[3]), DNMT3A = c(MUT=mycol[1],"NA"=mycol[2],WT=mycol[3]) ) , border_color= NA, color = colorRampPalette(rev(brewer.pal(n = 5 , name = "RdYlBu")))(100), scale = "row", cluster_cols = FALSE, show_colnames= F, row_annotation =3, cellheight = 20)
#dev.off()
gep<- geneExpr[,select_hist$sample.nameNEW]
mat<- gep[list_genes_all,]
geneannotation1 <- getBM( attributes = c("ensembl_transcript_id", "entrezgene", "external_gene_name"), filters = "entrezgene", values = gsub("_at","",list_genes_all), mart = ensembl)
sort(unique(geneannotation1$external_gene_name))
[1] "ADRA2A" "AL441992.1" "ARHGEF10" "BATF3" "C3" "COL4A4" "DZIP1" "EFNB2" "HS3ST3A1" "ID2" "NETO2" "OSMR" "PRRX1"
[14] "ROBO1" "SLC5A3" "TMOD1" "TNFRSF8" "XKR4" "YAP1"
setdiff(rownames(mat), paste0(unique(geneannotation1$entrezgene),"_at"))
character(0)
for (ii in 1:nrow(mat)) {
#if(length (which (paste0(unique(geneannotation1$entrezgene),"_at") == rownames(mat)[ii])) != 0 ) rownames(mat) [ii] = geneannotation1$external_gene_name [ which (paste0(unique(geneannotation1$entrezgene),"_at") == rownames(mat)[ii])]
rownames(mat) [ii] = unique(geneannotation1$external_gene_name) [ which (paste0(unique(geneannotation1$entrezgene),"_at") == rownames(mat)[ii])]
}
mycol= c("red","white","yellow")
mylabel = select_hist[,c("sample.nameNEW","final.molec","IDH2","RHOA","TET2","DNMT3A")]
rownames(mylabel) = mylabel$sample.nameNEW
mylabel$sample.nameNEW = NULL
mylabel.nocol = mylabel
mylabel.col = mylabel
mylabel.col[is.na(mylabel.col)]<-0
#head(mylabel.col)
mylabel.col$final.molec[mylabel.col$final.molec == "AITL"] = "black"; mylabel.col$final.molec[mylabel.col$final.molec == "PTCL.nos"] = "orange"; mylabel.col$final.molec[mylabel.col$final.molec == "ALCL.neg"] = "yellow"
for (a in 2:5) mylabel.col[,a] = factor(mylabel.col[,a], levels = levels(as.factor(mylabel.col[,a])), labels = mycol )
mat <- mat - rowMeans(mat)
par(mfrow=c(1,1))
pheatmap(mat, annotation_col = mylabel.nocol, annotation_colors = list(final.molec = c(AITL = "black", PTCL.nos = "orange", ALCL.neg = "yellow"), filename= "x.pdf",
IDH2 = c(MUT=mycol[1],"NA"=mycol[2],WT=mycol[3]),
RHOA = c(MUT=mycol[1],"NA"=mycol[2],WT=mycol[3]),
TET2 = c(MUT=mycol[1],"NA"=mycol[2],WT=mycol[3]),
DNMT3A = c(MUT=mycol[1],"NA"=mycol[2],WT=mycol[3]) ) , show_colnames = F, cellheight = 15,
border_color= NA, color = colorRampPalette(rev(brewer.pal(n = 5 , name = "RdYlBu")))(20), scale = "row", clustering_method = "ward.D2",clustering_distance_cols = "euclidean" , silent = F)
### export pts order
cluster.pts.nr$tree_col$labels [cluster.pts.nr$tree_col$order]
[1] "PTCL.nos..23" "PTCL.nos..428" "PTCL.nos..448" "PTCL.nos..124" "PTCL.nos..247" "PTCL.nos..463" "PTCL.nos..89" "PTCL.nos..156" "PTCL.nos..432" "PTCL.nos..216" "PTCL.nos..25"
[12] "PTCL.nos..87" "PTCL.nos..94" "PTCL.nos..98" "PTCL.nos..105" "PTCL.nos..93" "PTCL.nos..195" "AITL..413" "PTCL.nos..531" "PTCL.nos..143" "PTCL.nos..46" "PTCL.nos..28"
[23] "PTCL.nos..185" "PTCL.nos..416" "PTCL.nos..112" "PTCL.nos..424" "PTCL.nos..134" "PTCL.nos..32" "PTCL.nos..22" "PTCL.nos..194" "PTCL.nos..30" "PTCL.nos..211" "PTCL.nos..52"
[34] "PTCL.nos..97" "PTCL.nos..201" "PTCL.nos..27" "PTCL.nos..68" "PTCL.nos..139" "PTCL.nos..72" "PTCL.nos..120" "PTCL.nos..444" "PTCL.nos..24" "PTCL.nos..15" "PTCL.nos..109"
[45] "PTCL.nos..29" "PTCL.nos..100" "PTCL.nos..171" "PTCL.nos..104" "PTCL.nos..99" "PTCL.nos..126" "PTCL.nos..258" "AITL..536" "PTCL.nos..535" "PTCL.nos..20" "PTCL.nos..102"
[56] "PTCL.nos..452" "PTCL.nos..529" "PTCL.nos..90" "PTCL.nos..230" "PTCL.nos..231" "PTCL.nos..232" "PTCL.nos..236" "PTCL.nos..16" "PTCL.nos..189" "PTCL.nos..506" "PTCL.nos..519"
[67] "PTCL.nos..151" "PTCL.nos..186" "AITL..419" "PTCL.nos..455" "PTCL.nos..47" "PTCL.nos..213" "PTCL.nos..161" "PTCL.nos..61" "PTCL.nos..209" "PTCL.nos..119" "PTCL.nos..80"
[78] "PTCL.nos..34" "PTCL.nos..440" "AITL..19" "AITL..18" "PTCL.nos..504" "PTCL.nos..118" "PTCL.nos..293" "PTCL.nos..92" "PTCL.nos..251" "PTCL.nos..101" "PTCL.nos..446"
[89] "AITL..479" "PTCL.nos..469" "AITL..411" "AITL..473" "AITL..472" "AITL..481" "AITL..487" "PTCL.nos..434" "PTCL.nos..180" "PTCL.nos..135" "PTCL.nos..445"
[100] "PTCL.nos..408" "PTCL.nos..409" "PTCL.nos..460" "PTCL.nos..468" "PTCL.nos..470" "PTCL.nos..471" "PTCL.nos..441" "PTCL.nos..451" "AITL..12" "PTCL.nos..237" "AITL..165"
[111] "PTCL.nos..178" "AITL..458" "AITL..191" "AITL..163" "AITL..187" "AITL..62" "PTCL.nos..249" "AITL..250" "AITL..257" "AITL..110" "AITL..260"
[122] "AITL..60" "AITL..77" "AITL..84" "AITL..106" "AITL..74" "AITL..133" "AITL..113" "AITL..127" "AITL..44" "AITL..82" "AITL..197"
[133] "AITL..223" "AITL..17" "AITL..523" "AITL..530" "AITL..154" "AITL..45" "AITL..505" "AITL..2" "AITL..238" "AITL..11" "AITL..259"
[144] "AITL..10" "AITL..234" "AITL..435" "PTCL.nos..239" "AITL..6" "AITL..438" "AITL..518" "AITL..532" "AITL..256" "AITL..449" "AITL..129"
[155] "AITL..9" "AITL..450" "AITL..534" "AITL..66" "AITL..255" "AITL..207" "AITL..8" "AITL..1" "AITL..152" "AITL..229" "AITL..248"
[166] "AITL..235" "AITL..420" "AITL..483" "AITL..157" "AITL..179" "AITL..67" "AITL..70" "AITL..225" "AITL..71" "AITL..58" "AITL..78"
[177] "AITL..137" "AITL..206" "AITL..459" "AITL..144" "AITL..222" "PTCL.nos..294" "AITL..198" "AITL..453" "AITL..210" "AITL..26" "AITL..114"
[188] "PTCL.nos..226" "AITL..51" "AITL..224" "AITL..69" "AITL..123" "AITL..221" "AITL..150" "AITL..43" "AITL..153" "AITL..520" "AITL..159"
[199] "AITL..79" "AITL..204" "AITL..214" "PTCL.nos..246" "AITL..167" "AITL..190" "PTCL.nos..289" "PTCL.nos..287" "PTCL.nos..288" "PTCL.nos..128" "PTCL.nos..136"
[210] "PTCL.nos..91" "PTCL.nos..86" "PTCL.nos..196" "PTCL.nos..212" "PTCL.nos..13" "PTCL.nos..96" "PTCL.nos..95" "PTCL.nos..208" "PTCL.nos..290" "PTCL.nos..115" "PTCL.nos..219"
[221] "AITL..484" "AITL..7" "AITL..457" "AITL..454" "PTCL.nos..200" "PTCL.nos..215" "PTCL.nos..252" "PTCL.nos..166" "PTCL.nos..218" "PTCL.nos..291" "PTCL.nos..292"
[232] "PTCL.nos..467" "PTCL.nos..482" "AITL..502" "AITL..515" "AITL..406" "PTCL.nos..162" "PTCL.nos..465" "PTCL.nos..203" "PTCL.nos..63" "PTCL.nos..240" "PTCL.nos..33"
[243] "AITL..510" "AITL..513" "AITL..147" "AITL..426" "PTCL.nos..527" "PTCL.nos..414" "PTCL.nos..415" "AITL..14" "AITL..456" "AITL..205" "AITL..55"
[254] "AITL..64" "AITL..199" "AITL..65" "PTCL.nos..3" "AITL..121" "AITL..517" "PTCL.nos..174" "PTCL.nos..524" "PTCL.nos..243" "PTCL.nos..242" "PTCL.nos..244"
[265] "AITL..392" "AITL..466" "AITL..4" "AITL..5" "PTCL.nos..241" "AITL..417" "AITL..461"
##### cibersort and origical molecular histologies
load("./Rmd.files/cibersort.all.Rdata")
ciber_all<-as.data.frame.matrix(t(cibersort.percentages))
ciber_all$sample.nameNEW <- rownames(ciber_all)
colnames(kk)[2]<-"sample.nameNEW"
require(plyr)
final <-join(ciber_all, kk, by = "sample.nameNEW", type="left")
final2<-merge(pts.info.data[,c(1,6,14:17)], final, by="sample.nameNEW")
final3<- subset(final2, final.molec %in% c("AITL","ALCL.neg","ALCL.pos","ATLL","NKT","PTCL.nos"))
final3<- final3[order(final3$final.molec),]
library(RColorBrewer)
n <- 22
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
par(mar=c(2,5,7,10), xpd=TRUE)
x<- barplot(t(final3[7:28]), names.arg = rep("", length(final3$final.molec)), cex.names = 0.7, col=col_vector, border=NA,
space=rep(0, nrow(final3)))
legend("topright",legend=colnames(final3)[7:28], col=col_vector, pch=c(15), inset=c(-0.11,0), pt.cex= 1,
cex = 1, bty = "n", x.intersp = 0.7)
names_hist<- unique(final3$final.molec)
col_hist<- c("orange","yellow","dodgerblue2","brown2","darkorchid1","black")
num<- as.numeric(table(final3$final.molec))
for(i in (1:length(num)))
{
segments(x[sum(num[1:i])+1-num[i]], 1.05,x[sum(num[1:i])],1.05,lwd=4, col=col_hist[i])
text(x[(sum(num[1:i])-num[i] +1+ sum(num[1:i]))/2], 1.1, names_hist[i], cex=1.2, srt=0)
}
##### plot cibersort profile of patients stratified according to histology and clusters
for(i in (1:nrow(final3)))
{
final3$cluster[i][is.na(final3$cluster[i])]<- final3$final.molec[i]
}
final3$cluster <- factor(final3$cluster, levels = c( "1","2","4","NKT","3","5","ALCL.pos", "ATLL"))
final3<- final3[order(final3$cluster),]
#pdf("barplot_cibersort.pdf", width = 20, height = 7)
par(mar=c(2,5,7,10), xpd=TRUE)
x<- barplot(t(final3[7:28]), names.arg = rep("", length(final3$final.molec)), cex.names = 0.7, col=col_vector, border=NA,
space=rep(0, nrow(final3)))
legend("topright",legend=colnames(final3)[7:28], col=col_vector, pch=c(15), inset=c(-0.11,0), pt.cex= 1,
cex = 1, bty = "n", x.intersp = 0.7)
mycol_plus<- c(brewer.pal(11,"Paired"),brewer.pal(6,"Dark2"))
names_hist<- c("C-1","C-2", "C-4","NKT","C-3","C-5","ALCL.pos","ATLL")
col_hist<- c(mycol_plus[1],mycol_plus[2],mycol_plus[4],"darkorchid1",mycol_plus[3],mycol_plus[5],"dodgerblue2","brown2","")
num<- as.numeric(table(final3$cluster))
par(new=TRUE)
for(i in (1:(length(num))))
{
segments(x[sum(num[1:i])+1-num[i]], 1.05,x[sum(num[1:i])],1.05,lwd=4, col=col_hist[i])
text(x[(sum(num[1:i])-num[i] +1+ sum(num[1:i]))/2], 1.1, names_hist[i], cex=1.2, srt=0)
}
# dev.off()
Boxplot comparing the contribution of each cibersort signature between all extracted clusters
par(mfrow=c(1,2))
par(mar=c(3,3,3,3), xpd=F)
for(i in (7:27))
{
#pdf(sprintf("%s_cibersort_ptcl.pdf",i), height=8, width=10)
k<- as.numeric(final2[,i])
table_wilk<- pairwise.wilcox.test(k,final2$cluster,p.adjust.methods = "bonferroni" )$p.value
df_wilk <- data.frame(expand.grid(dimnames(table_wilk)),array(table_wilk))
df_wilk2<-na.omit(df_wilk)
df_wilk2_sig<- df_wilk2[df_wilk2$array.table_wilk.<0.05,]
df_wilk2_sig$Var1<-as.numeric(as.character(df_wilk2_sig$Var1))
df_wilk2_sig$Var2<-as.numeric(as.character(df_wilk2_sig$Var2))
if(nrow(df_wilk2_sig)>0)
{
boxplot(k~final2$cluster, ylim=c(0,(max(k)+0.2)), main=colnames(final2)[i], cex.main=2, col=mycol_plus, las=2)
for(j in (1:nrow(df_wilk2_sig)))
{
segments(df_wilk2_sig$Var1[j], max(k)-0.01+j/30, df_wilk2_sig$Var2[j],max(k)-0.01+j/30)
p<-df_wilk2_sig$array.table_wilk.[j]
if(p<0.00001){p2 = "<0.00001"}else{
p2<-as.numeric(formatC(p,digits=6,format="f"))}
pval <- paste("p =",p2,sep=" ")
text((df_wilk2_sig$Var1[j]+ df_wilk2_sig$Var2[j])/1.9, max(k) +j/30, pval, cex=0.8)
}
}
#dev.off()
}
# for convenience: reimport annotated matrix
final<- read.delim("./Rmd.files/aitl_nos_alcl_clsutering.txt",sep="\t",header = T,stringsAsFactors = F)
final2<- final[,c("Row.names","hist","cluster")]
mat<- read.delim("./Rmd.files/ensembl_annotated_matrix.txt", sep="\t", stringsAsFactors = F)
design <- model.matrix(~ 0+factor(final2$cluster)) ##### create matrix
colnames(design)<-paste0("Cluster_",c(1:5))
contrast.matrix <- makeContrasts(Cluster_2-Cluster_1, Cluster_3-Cluster_1,Cluster_4-Cluster_1, Cluster_5-Cluster_1,
Cluster_3-Cluster_2, Cluster_4-Cluster_2, Cluster_5-Cluster_2,
Cluster_4-Cluster_3, Cluster_5-Cluster_3,
Cluster_4-Cluster_5,
Cluster_2-(Cluster_1 + Cluster_3 + Cluster_4 + Cluster_5)/4,
Cluster_3-(Cluster_1 + Cluster_2 + Cluster_4 + Cluster_5)/4,
Cluster_4-(Cluster_1 + Cluster_2 + Cluster_3 + Cluster_5)/4,
Cluster_1-(Cluster_2 + Cluster_3 + Cluster_4 + Cluster_5)/4,
Cluster_5-(Cluster_2 + Cluster_3 + Cluster_4 + Cluster_1)/4,
levels=design)
fit1 <- lmFit(mat, design)
fit2 <- contrasts.fit(fit1, contrast.matrix)
fit <- eBayes(fit2)
geneExpr = adj.data
geneExpr2<- geneExpr[,colnames(geneExpr) %in% final2$Row.names ]
geneExpr2<- geneExpr2[,final2$Row.names]
ensembl = useMart( "ensembl", dataset = "hsapiens_gene_ensembl" )
hgnc <- getBM(attributes=c('entrezgene','hgnc_symbol','hgnc_id'),filters = 'entrezgene', values = gsub("_at","",rownames(geneExpr2)),mart = ensembl)
Batch submitting query [========>-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------] 5% eta: 8s
Batch submitting query [=============>------------------------------------------------------------------------------------------------------------------------------------------------------------------------------] 7% eta: 9s
Batch submitting query [=================>--------------------------------------------------------------------------------------------------------------------------------------------------------------------------] 10% eta: 9s
Batch submitting query [======================>---------------------------------------------------------------------------------------------------------------------------------------------------------------------] 12% eta: 13s
Batch submitting query [===========================>----------------------------------------------------------------------------------------------------------------------------------------------------------------] 15% eta: 12s
Batch submitting query [===============================>------------------------------------------------------------------------------------------------------------------------------------------------------------] 17% eta: 11s
Batch submitting query [====================================>-------------------------------------------------------------------------------------------------------------------------------------------------------] 20% eta: 11s
Batch submitting query [========================================>---------------------------------------------------------------------------------------------------------------------------------------------------] 22% eta: 11s
Batch submitting query [=============================================>----------------------------------------------------------------------------------------------------------------------------------------------] 24% eta: 10s
Batch submitting query [=================================================>------------------------------------------------------------------------------------------------------------------------------------------] 27% eta: 10s
Batch submitting query [======================================================>-------------------------------------------------------------------------------------------------------------------------------------] 29% eta: 10s
Batch submitting query [===========================================================>--------------------------------------------------------------------------------------------------------------------------------] 32% eta: 10s
Batch submitting query [===============================================================>----------------------------------------------------------------------------------------------------------------------------] 34% eta: 10s
Batch submitting query [====================================================================>-----------------------------------------------------------------------------------------------------------------------] 37% eta: 9s
Batch submitting query [========================================================================>-------------------------------------------------------------------------------------------------------------------] 39% eta: 9s
Batch submitting query [=============================================================================>--------------------------------------------------------------------------------------------------------------] 41% eta: 8s
Batch submitting query [==================================================================================>---------------------------------------------------------------------------------------------------------] 44% eta: 8s
Batch submitting query [======================================================================================>-----------------------------------------------------------------------------------------------------] 46% eta: 8s
Batch submitting query [===========================================================================================>------------------------------------------------------------------------------------------------] 49% eta: 7s
Batch submitting query [===============================================================================================>--------------------------------------------------------------------------------------------] 51% eta: 7s
Batch submitting query [====================================================================================================>---------------------------------------------------------------------------------------] 54% eta: 7s
Batch submitting query [========================================================================================================>-----------------------------------------------------------------------------------] 56% eta: 7s
Batch submitting query [=============================================================================================================>------------------------------------------------------------------------------] 59% eta: 6s
Batch submitting query [==================================================================================================================>-------------------------------------------------------------------------] 61% eta: 6s
Batch submitting query [======================================================================================================================>---------------------------------------------------------------------] 63% eta: 6s
Batch submitting query [===========================================================================================================================>----------------------------------------------------------------] 66% eta: 6s
Batch submitting query [===============================================================================================================================>------------------------------------------------------------] 68% eta: 5s
Batch submitting query [====================================================================================================================================>-------------------------------------------------------] 71% eta: 5s
Batch submitting query [=========================================================================================================================================>--------------------------------------------------] 73% eta: 5s
Batch submitting query [=============================================================================================================================================>----------------------------------------------] 76% eta: 4s
Batch submitting query [==================================================================================================================================================>-----------------------------------------] 78% eta: 4s
Batch submitting query [======================================================================================================================================================>-------------------------------------] 80% eta: 3s
Batch submitting query [===========================================================================================================================================================>--------------------------------] 83% eta: 3s
Batch submitting query [===============================================================================================================================================================>----------------------------] 85% eta: 3s
Batch submitting query [====================================================================================================================================================================>-----------------------] 88% eta: 2s
Batch submitting query [=========================================================================================================================================================================>------------------] 90% eta: 2s
Batch submitting query [=============================================================================================================================================================================>--------------] 93% eta: 1s
Batch submitting query [==================================================================================================================================================================================>---------] 95% eta: 1s
Batch submitting query [======================================================================================================================================================================================>-----] 98% eta: 0s
Batch submitting query [============================================================================================================================================================================================] 100% eta: 0s
geneExpr3<- as.data.frame.matrix(geneExpr2[which(rownames(geneExpr2) %in% paste0(hgnc$entrezgene,"_at")),])
levels_design<- c("Cluster_2-Cluster_1","Cluster_3-Cluster_1","Cluster_4-Cluster_1","Cluster_5-Cluster_1",
"Cluster_3-Cluster_2","Cluster_4-Cluster_2","Cluster_5-Cluster_2","Cluster_4-Cluster_3",
"Cluster_5-Cluster_3","Cluster_4-Cluster_5",
"Cluster_2-(Cluster_1 + Cluster_3 + Cluster_4 + Cluster_5)/4",
"Cluster_3-(Cluster_1 + Cluster_2 + Cluster_4 + Cluster_5)/4",
"Cluster_4-(Cluster_1 + Cluster_2 + Cluster_3 + Cluster_5)/4",
"Cluster_1-(Cluster_2 + Cluster_3 + Cluster_4 + Cluster_5)/4",
"Cluster_5-(Cluster_2 + Cluster_3 + Cluster_4 + Cluster_1)/4")
df_diff_all=NULL
for(i in (1:length(levels_design)))
{
tt <- topTable(fit, coef=i, number=Inf, genelist=rownames(geneExpr3))
tt$ID<- rownames(tt)
colnames(tt)[1]<-"GENE_SYMBOL"
head(tt, 10)
fg <- tt$GENE_SYMBOL[tt$adj.P.Val < 0.001 & abs( tt$logFC ) > 2]
length(fg)
df_diff<- cbind(fg, rep(levels_design[i], length(fg)))
df_diff_all<-rbind(df_diff_all, df_diff)
#plot(tt$logFC, -log10(tt$adj.P.Val))
}
df_diff_all<- as.data.frame.matrix(df_diff_all)
annotation_col<- final2
colnames(annotation_col)<-c("sampleID","Hist","cluster")
A <- function(x) (as.factor(as.character(x))) ##### lapply function for all columns to generate the relative contribution
annotation_col[,1:ncol(annotation_col)] = apply(annotation_col[,1:ncol(annotation_col)], 2, function(x) as.factor(as.character(x)))
annotation_col<- as.data.frame(annotation_col[,-1])
mycol_plus<- c(brewer.pal(11,"Paired"),brewer.pal(6,"Dark2"))
ann_colors = list(Hist=c( "AITL"="black","ALCL"="yellow","PTCL"="orange"),
cluster=c("1" = mycol_plus[1],"2" = mycol_plus[2],"3" = mycol_plus[3],"4" = mycol_plus[4],"5" =mycol_plus[5])
)
edata3<- mat[rownames(mat) %in% unique(df_diff_all$fg),]
pheatmap(as.matrix( as.matrix(edata3)), annotation_col=annotation_col, annotation_colors = ann_colors, border_color="NA", scale = "row", cluster_cols = FALSE, show_colnames= F, show_rownames = FALSE)
levels_design<- c("Cluster_2-Cluster_1","Cluster_3-Cluster_1","Cluster_4-Cluster_1","Cluster_5-Cluster_1",
"Cluster_3-Cluster_2","Cluster_4-Cluster_2","Cluster_5-Cluster_2","Cluster_4-Cluster_3",
"Cluster_5-Cluster_3","Cluster_4-Cluster_5",
"Cluster_2-(Cluster_1 + Cluster_3 + Cluster_4 + Cluster_5)/4",
"Cluster_3-(Cluster_1 + Cluster_2 + Cluster_4 + Cluster_5)/4",
"Cluster_4-(Cluster_1 + Cluster_2 + Cluster_3 + Cluster_5)/4",
"Cluster_1-(Cluster_2 + Cluster_3 + Cluster_4 + Cluster_5)/4",
"Cluster_5-(Cluster_2 + Cluster_3 + Cluster_4 + Cluster_1)/4")
df_diff_all=NULL
for(i in (1:length(levels_design)))
{
tt <- topTable(fit, coef=i, number=Inf, genelist=rownames(geneExpr3))
tt$ID<- rownames(tt)
colnames(tt)[1]<-"GENE_SYMBOL"
head(tt, 10)
fg <- tt$GENE_SYMBOL[tt$adj.P.Val < 0.001 & abs( tt$logFC ) > 2]
length(fg)
df_diff<- cbind(fg, rep(levels_design[i], length(fg)))
df_diff_all<-rbind(df_diff_all, df_diff)
#plot(tt$logFC, -log10(tt$adj.P.Val))
}
df_diff_all<- as.data.frame.matrix(df_diff_all)
table(df_diff_all$V2)
Cluster_1-(Cluster_2 + Cluster_3 + Cluster_4 + Cluster_5)/4 Cluster_3-(Cluster_1 + Cluster_2 + Cluster_4 + Cluster_5)/4 Cluster_3-Cluster_1
18 84 203
Cluster_3-Cluster_2 Cluster_4-Cluster_1 Cluster_4-Cluster_2
121 15 2
Cluster_4-Cluster_3 Cluster_4-Cluster_5 Cluster_5-(Cluster_2 + Cluster_3 + Cluster_4 + Cluster_1)/4
78 8 5
Cluster_5-Cluster_1 Cluster_5-Cluster_2 Cluster_5-Cluster_3
41 19 74
annotation_col<- final2
colnames(annotation_col)<-c("sampleID","Hist","cluster")
A <- function(x) (as.factor(as.character(x))) ##### lapply function for all columns to generate the relative contribution
annotation_col[,1:ncol(annotation_col)] = apply(annotation_col[,1:ncol(annotation_col)], 2, function(x) as.factor(as.character(x)))
annotation_col<- as.data.frame(annotation_col[,-1])
mycol_plus<- c(brewer.pal(11,"Paired"),brewer.pal(6,"Dark2"))
ann_colors = list(Hist=c( "AITL"="black","ALCL"="yellow","PTCL"="orange"),
cluster=c("1" = mycol_plus[1],"2" = mycol_plus[2],"3" = mycol_plus[3],"4" = mycol_plus[4],"5" =mycol_plus[5])
)
edata3<- mat[rownames(mat) %in% unique(df_diff_all$fg),]
pheatmap(as.matrix( as.matrix(edata3)), annotation_col=annotation_col, annotation_colors = ann_colors, border_color="NA",
scale = "row", cluster_cols = FALSE, show_colnames= F, show_rownames = FALSE)
############ table of genes
df_diff_all_tab=NULL
for(i in (1:length(levels_design)))
{
tt <- topTable(fit, coef=i, number=Inf, genelist=rownames(geneExpr3))
tt$ID<- rownames(tt)
colnames(tt)[1]<-"GENE_SYMBOL"
head(tt,10)
fg <- tt[tt$adj.P.Val < 0.001 & abs( tt$logFC ) > 2,]
if(nrow(fg)>0){
fg$design<- levels_design[i]
df_diff_all_tab<-rbind.data.frame(df_diff_all_tab, fg)
#plot(tt$logFC"," -log10(tt$adj.P.Val))
}
}
nrow(df_diff_all_tab) #### number of genes differentially expressed between C-1, C-2, C-3, C-4, C-5
[1] 668
##### list gene from Iqbal et al. blood 2014
iqbal<- unique(c("EFNB2","ROBO1","S1PR3","ANK2","LPAR1","SNAP91","SOX8","LPAR1","RAMP3","S1PR3","ROBO1","EFNB2","TUBB2B","SOX8",
"SOX8","ARHGEF10","DMRT1", "SLC19A21","STK3","PERP","TNFRSF8","TMOD1","BATF3","CDC14B","PERP","WDFEY3",
"TMOD1","ATP6V0D1","AXL","CD59","CHI3L1","CLTC","COL6A1","CREG1","CTSB","CTSC","NR1","H3","PDXK","PITPNA",
"PLSCR1","PRDX3","CTSS","CYBB","FABP3","FPR1","FTL","GUCA2A","HCK","IFI30","IL13RA1","JAK2","LILRB1",
"PRKG1PSAP","SLC7A7","SOD2","TCN2","THY1","TYR","UBE2L6","WARS","AXL","FTL","SIRPA","STAT1","CSF2","IFNG",
"SEPT6","GATA3","CD28","STAT1","AXL","CD28","CD40","CD59","CSF2","FTL","IFNG","LILRB1","SIRPA","TBX21",
"MSH6","EGR1","CAT","EGR1","CAT"))
intersect(iqbal, unique(df_diff_all_tab$GENE_SYMBOL))
[1] "ROBO1" "LPAR1" "SOX8" "TUBB2B" "TNFRSF8" "TMOD1" "BATF3" "ATP6V0D1" "CHI3L1" "CREG1" "CTSB" "CTSC" "FTL" "HCK"
fit1 <- lmFit(mat, design)
fit2 <- contrasts.fit(fit1, contrast.matrix)
fit <- eBayes(fit2)
res.l <- tmodLimmaTest(fit, rownames(mat))
length(res.l)
[1] 15
names(res.l)
[1] "Cluster_2 - Cluster_1" "Cluster_3 - Cluster_1" "Cluster_4 - Cluster_1"
[4] "Cluster_5 - Cluster_1" "Cluster_3 - Cluster_2" "Cluster_4 - Cluster_2"
[7] "Cluster_5 - Cluster_2" "Cluster_4 - Cluster_3" "Cluster_5 - Cluster_3"
[10] "Cluster_4 - Cluster_5" "Cluster_2 - (Cluster_1 + Cluster_3 + Cluster_4 + Cluster_5)/4" "Cluster_3 - (Cluster_1 + Cluster_2 + Cluster_4 + Cluster_5)/4"
[13] "Cluster_4 - (Cluster_1 + Cluster_2 + Cluster_3 + Cluster_5)/4" "Cluster_1 - (Cluster_2 + Cluster_3 + Cluster_4 + Cluster_5)/4" "Cluster_5 - (Cluster_2 + Cluster_3 + Cluster_4 + Cluster_1)/4"
pie <- tmodLimmaDecideTests(fit, genes=rownames(mat))
par(mfrow=c(1,1))
res.l2<- lapply(res.l, function(x) {x[x$adj.P.Val<10e-8,]})
tmodPanelPlot(res.l2, pie=pie, text.cex=0.6) ##### zero = grey, blue down in the first factor and red up in the first
res.l2<- lapply(res.l, function(x) {x[x$adj.P.Val>10e-8 & x$adj.P.Val<10e-5,]})
tmodPanelPlot(res.l2, pie=pie, text.cex=0.6) ##### zero = grey, blue down in the first factor and red up in the first